In [1]:
# import common packages
import altair as alt
import pandas as pd
import os
from toolz.curried import pipe
from vega_datasets import data
from altair import datum
In [20]:
geo_race_urban_plot
Out[20]:
Loading and Processing Data:¶
In [2]:
# load dataset
processed_data= pd.read_csv("../../data/processed/cleaned_race_data.csv", low_memory=True)
processed_data.head(5)
# allow larger data
alt.data_transformers.disable_max_rows()
Out[2]:
DataTransformerRegistry.enable('default')
In [3]:
# Create column voteshare = Democrats
processed_data['pref_dems'] = (processed_data['Voting_Preference'] == 'Democrat').astype(int)
processed_data['share_dems'] = processed_data.groupby(['Year_of_Study', 'Age'])['pref_dems'].transform('mean')
processed_data.tail(5)
Out[3]:
| Year_of_Study | Age | Race | Education | Income_Group | State_Code_FIPS | State_Code | Voting_Preference | Urban | birth_year | pref_dems | share_dems | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 31817 | 1952 | 33 | White non-Hispanic | Grade school or less | 68 to 95 percentile | 36 | NY | Democrat | Suburban areas | 1919 | 1 | 0.454545 |
| 31818 | 1952 | 48 | White non-Hispanic | High school | 68 to 95 percentile | 39 | OH | Republican | Suburban areas | 1904 | 0 | 0.250000 |
| 31819 | 1952 | 63 | White non-Hispanic | High school | 34 to 67 percentile | 36 | NY | Republican | Suburban areas | 1889 | 0 | 0.222222 |
| 31820 | 1952 | 25 | Black non-Hispanic | High school | 17 to 33 percentile | 26 | MI | Democrat | Central cities | 1927 | 1 | 0.428571 |
| 31821 | 1952 | 35 | White non-Hispanic | Grade school or less | 68 to 95 percentile | 39 | OH | Republican | Central cities | 1917 | 0 | 0.447368 |
In [4]:
# Create column voteshare = Republicans and drop working columns
processed_data['pref_repubs'] = (processed_data['Voting_Preference'] == 'Republican').astype(int)
processed_data['share_repubs'] = processed_data.groupby(['Year_of_Study', 'Age'])['pref_repubs'].transform('mean')
# processed_data = processed_data.drop(columns=['pref_repubs', 'pref_dems'])
In [5]:
processed_data['Year_of_Study'] = processed_data['Year_of_Study'].astype(int)
In [ ]:
Coding Visualizations Elements¶
In [6]:
# Create Chart Interactions
# slider to select year
slider = alt.binding_range(
min=2004, max=2020, step=4, name='Year: ' # Range, step, and label
)
select_year = alt.selection_point(
fields=['Year_of_Study'], # Bound field
bind=slider, # Bind to slider control
value=2020, # Default selected year
)
# slider to select birth year
select_birth_year = alt.selection_point(
fields=['birth_year'],
bind= alt.binding_range(min=1910, max=2020, step=4, name='Birth Year (Generation): '
),
value=2000,
)
# drop down to select year
select_election_year = alt.selection_point(
fields=['Year_of_Study'],
bind= alt.binding_range(min=2000, max=2020, step=4, name='Election Year: '
),
value=2020,
)
# drop down to select income
incomes = [None, '0 to 16 percentile',
'17 to 33 percentile','34 to 67 percentile', '68 to 95 percentile', '96 to 100 percentile', 'DK; NA; refused to answer; no Pre IW',]
incomes_label =['All', '0 to 16 percentile',
'17 to 33 percentile','34 to 67 percentile', '68 to 95 percentile', '96 to 100 percentile', "Don't know or refused"]
income_dropdown = alt.binding_select(options=incomes, labels= incomes_label, name="Income Group: ", )
income_select = alt.selection_point(fields=['Income_Group'], bind=income_dropdown,
# value=[{'Income_Group': '0 to 16 percentile'}]
)
# select year from legend
select_year_legend = alt.selection_point(
fields=['Year_of_Study'],
bind='legend',
)
In [7]:
# select States
# choropleth interactive elements
select_state = alt.selection_point(
fields=['State_Code_FIPS'],
empty='none'
)
# state drop down
# drop down to select income
states = [None, 'VA', 'OR', 'CA', 'ID', 'IA', 'LA', 'NE', 'NY', 'TN', 'AZ', 'MA',
'FL', 'NC', 'NJ', 'AR', 'WI', 'PA', 'OK', 'TX', 'IL', 'KY', 'DC',
'MD', 'KS', 'AL', 'MI', 'GA', 'MS', 'MN', 'CO', 'OH', 'IN', 'SC',
'CT', 'MO', 'NM', 'WA', 'UT', 'HI', 'NV', 'AK', 'MT', 'NH', 'ME',
'DE', 'VT', 'WV', 'ND', 'WY', 'RI', 'SD',]
states_label =['All', 'VA', 'OR', 'CA', 'ID', 'IA', 'LA', 'NE', 'NY', 'TN', 'AZ', 'MA',
'FL', 'NC', 'NJ', 'AR', 'WI', 'PA', 'OK', 'TX', 'IL', 'KY', 'DC',
'MD', 'KS', 'AL', 'MI', 'GA', 'MS', 'MN', 'CO', 'OH', 'IN', 'SC',
'CT', 'MO', 'NM', 'WA', 'UT', 'HI', 'NV', 'AK', 'MT', 'NH', 'ME',
'DE', 'VT', 'WV', 'ND', 'WY', 'RI', 'SD',]
states_dropdown = alt.binding_select(options=states, labels= states_label, name="State: ", )
select_state_drop = alt.selection_point(fields=['State_Code'], bind=states_dropdown
)
In [8]:
# drop down to select Education
educations = [None, 'College or advanced degree', 'High school', 'Some college',
'Grade school or less', 'DK; NA']
education_labels =['All', 'College or advanced degree', 'High school', 'Some college',
'Grade school or less', 'DK; NA']
education_dropdown = alt.binding_select(options=educations, labels= education_labels, name="Level of Education: ", )
select_education = alt.selection_point(fields=['Education'], bind=education_dropdown,
)
In [9]:
#create brush that grabs year or age info from x axis
brush = alt.selection_interval(
encodings=['x']
)
Question 3.1 Plot Code¶
In [10]:
# generation vs vote share plot
loess = alt.Chart(processed_data).add_params(
select_election_year,
income_select
).transform_filter(
alt.datum.Year_of_Study >=2000,
alt.datum.Age >=18
).mark_circle(opacity=0.25, size = 50).encode(
alt.X('birth_year:Q', scale=alt.Scale(zero=False), axis=alt.Axis(format='d'), title = "Year of Birth"),
alt.Y('share_repubs', title = "Republican Vote Share"),
opacity=alt.condition(income_select & brush & (select_election_year), alt.value(1), alt.value(0.05)),
color=alt.when(income_select & brush & (select_election_year)).then('Year_of_Study:N', legend=alt.Legend(title='Election Year')).otherwise(alt.ColorValue("lightgray")),
tooltip=[
alt.Tooltip('State_Code:O', title='State'),
alt.Tooltip('birth_year:Q', title='Year of Birth'),
alt.Tooltip('Age:Q', title='Age at Election'),
alt.Tooltip('Value:Q', title='Avg Rating', format='.2f'),
alt.Tooltip('Year_of_Study:N', title='Election Year')
]
).add_params(
brush
).properties(
title = "Republican Vote Share by Generation"
)
generation_plot = loess+loess.transform_loess('birth_year', 'share_repubs', groupby=['Year_of_Study']).mark_line(size=4)
# generation_plot
In [11]:
# age and vote share plot
age_plot = alt.Chart(processed_data).transform_filter(
alt.datum.Year_of_Study >=1990,
alt.datum.Age >=18
).mark_circle(size = 30).add_params(
brush, select_election_year, income_select
).encode(
alt.X('Age', title = 'Age (years)'),
alt.Y('share_repubs', title = "Republican Vote Share"),
color=alt.when(income_select & brush & select_election_year).then("Year_of_Study:N", legend=alt.Legend(title='Election Year')).otherwise(alt.ColorValue("gainsboro")),
opacity=alt.condition(income_select & brush & select_election_year, alt.value(1), alt.value(0.05)),
tooltip=[
alt.Tooltip('State_Code:O', title='State'),
alt.Tooltip('birth_year:Q', title='Year of Birth'),
alt.Tooltip('Age:Q', title='Age at Election'),
alt.Tooltip('Value:Q', title='Avg Rating', format='.2f'),
alt.Tooltip('Year_of_Study:N', title='Election Year')
]
).properties(
title = "Republican Vote Share by Age"
)
# age_plot
Plot for Question 3.1¶
In [12]:
(age_plot| generation_plot)
Out[12]: